business <- read_csv("./data/business.csv") %>% 
  mutate(name = str_sub(name, 2, -2)) %>% 
  mutate(address = str_sub(address, 2, -2)) %>% 
  filter(neighborhood != "Downtown Tampa" & neighborhood != "North Valley")

categories <- business %>% 
  select(business_id, categories) %>% 
  separate(categories, into = c("cat1", "cat2", "cat3", "cat4", "cat5", "cat6", "cat7", "cat8", "cat9", "cat10", "cat11", "cat12", "cat13", "cat14", "cat15", "cat16", "cat17", "cat18", "cat19", "cat20", "cat21", "cat22", "cat23", "cat24", "cat25"), sep  = ";") %>% 
  gather(key = "cat_id", value = "category", starts_with("cat")) %>% 
  select(-cat_id) 
  
restaurant_ids <- categories %>% 
  filter(category == "Restaurants") %>% 
  distinct(business_id)

restaurants <- business %>% 
  mutate(is_rest = ifelse(str_detect(categories, ";Restaurant") == TRUE, 1, 
                                ifelse(str_match(categories, ";Food") == TRUE, 1, 0))) %>% 
  filter(is_rest == 1) %>% 
  select(-is_rest) 
attributes <- read_csv("./data/attributes.csv") %>% 
  janitor::clean_names() %>% 
  select(business_id, alcohol)
<<<<<<< HEAD
restaurants <- restaurants %>% 
  left_join(attributes, by = "business_id")
restaurants %>% 
  group_by(neighborhood) %>% 
  count() %>% 
  arrange(n) %>% 
  knitr::kable()
=======

Joining the business and attributes datasets

restaurants <- restaurants %>% 
  left_join(attributes, by = "business_id")
exploratory stuff
restaurants %>% 
  group_by(neighborhood) %>% 
  count() %>% 
  arrange(n) %>% 
  knitr::kable()
>>>>>>> 63202e9a2379c7f7d5f2807943011ecd8f3bbc2f
neighborhood n
Anthem 10
The Lakes 19
University 55
South Summerlin 64
Summerlin 95
Centennial 111
Sunrise 129
Northwest 143
Southwest 152
Chinatown 224
Downtown 277
Spring Valley 347
Eastside 357
Southeast 431
Westside 449
The Strip 613
restaurants %>% 
select(business_id, neighborhood, stars) %>% 
  distinct() %>% 
  group_by(neighborhood, stars) %>% 
  tally %>% 
  rename(my_count = n) %>% 
  spread(key = stars, value = my_count) %>% 
  knitr::kable()
neighborhood 1 1.5 2 2.5 3 3.5 4 4.5 5
Anthem NA NA 1 NA 2 3 2 2 NA
Centennial 1 4 4 14 25 29 21 11 2
Chinatown NA 1 6 9 35 59 65 42 7
Downtown 1 4 9 15 40 56 89 45 18
Eastside 1 9 24 38 58 72 100 48 7
Northwest NA 3 9 17 25 41 30 13 5
South Summerlin NA NA 1 4 11 24 23 1 NA
Southeast 1 9 34 52 62 104 107 45 17
Southwest 1 4 9 16 21 38 42 18 3
Spring Valley NA 4 18 28 44 84 87 72 10
Summerlin 1 2 6 5 22 25 23 9 2
Sunrise 1 9 15 14 20 23 28 18 1
The Lakes NA NA NA NA 3 5 8 3 NA
The Strip 1 10 28 81 125 174 147 38 9
University NA 1 2 3 10 16 13 9 1
Westside 2 9 14 38 60 111 129 65 21
restaurants %>%
  mutate(review_count = as.numeric(review_count)) %>% 
  select(business_id, neighborhood, review_count) %>% 
  group_by(neighborhood) %>% 
  summarise(Average_Number_of_Reviews = mean(review_count)) %>% 
  knitr::kable()
neighborhood Average_Number_of_Reviews
Anthem 190.00000
Centennial 107.78378
Chinatown 190.02232
Downtown 174.64260
Eastside 134.10644
Northwest 80.65734
South Summerlin 211.89062
Southeast 116.44780
Southwest 144.33553
Spring Valley 144.63977
Summerlin 98.70526
Sunrise 36.75194
The Lakes 110.42105
The Strip 408.53507
University 89.81818
Westside 126.61025
<<<<<<< HEAD
restaurants %>% 
  group_by(neighborhood) %>% 
  summarize(avg = mean(stars), sd = sd(stars)) %>% 
  arrange(avg) %>% 
  knitr::kable()
=======
restaurants %>% 
  group_by(neighborhood) %>% 
  summarize(avg = mean(stars), sd = sd(stars)) %>% 
  arrange(avg) %>% 
  knitr::kable()
>>>>>>> 63202e9a2379c7f7d5f2807943011ecd8f3bbc2f
neighborhood avg sd
Sunrise 3.240310 0.9418664
Centennial 3.333333 0.7929615
The Strip 3.364600 0.7062534
Northwest 3.405594 0.7781140
Summerlin 3.415789 0.7775187
Southeast 3.429234 0.8270031
Southwest 3.453947 0.8083899
Eastside 3.457983 0.8156988
South Summerlin 3.523438 0.4994417
Anthem 3.550000 0.7619420
University 3.572727 0.7227659
Westside 3.609131 0.7794122
Spring Valley 3.631124 0.7712010
Downtown 3.707581 0.7692319
Chinatown 3.712054 0.6668898
The Lakes 3.789474 0.4806185
<<<<<<< HEAD

Column

Center of Las Vegas: 36.1699° N, 115.1398° W. Plotly displayed is surrounds this center by 0.5 degrees longitude and latitude.

======= >>>>>>> 63202e9a2379c7f7d5f2807943011ecd8f3bbc2f
restaurants %>% 
  filter(latitude > 35.6699 & latitude < 36.6699) %>% 
  filter(longitude < -114.6398 & longitude > -115.6398) %>%
  plot_ly(x = ~longitude, y = ~latitude, type = "scatter", mode = "markers",
          alpha = 0.5, 
          color = ~stars, hoverinfo = 'text',
        text = ~paste(name, " @", neighborhood, "\n", address, "\n", city, ", ", state, postal_code, "\n", stars, "stars on Yelp")) %>%
  layout(xaxis = list(title = "Longitude"),
         yaxis = list(title = "Latitude"))
<<<<<<< HEAD =======

Creating a plotly of restaurant locations

Center of Las Vegas: 36.1699° N, 115.1398° W. Plotly displayed is surrounds this center by 0.5 degrees longitude and latitude.

restaurants %>% 
  filter(latitude > 35.6699 & latitude < 36.6699) %>% 
  filter(longitude < -114.6398 & longitude > -115.6398) %>%
  plot_ly(x = ~longitude, y = ~latitude, type = "scatter", mode = "markers",
          alpha = 0.5, 
          color = ~stars, hoverinfo = 'text',
        text = ~paste(name, " @", neighborhood, "\n", address, "\n", city, ", ", state, postal_code, "\n", stars, "stars on Yelp")) %>%
  layout(xaxis = list(title = "Longitude"),
         yaxis = list(title = "Latitude"))

Creating a plotly of open and closed restaurant compared to their rating on Yelp

restaurants %>% 
  mutate(stars = if_else(stars == 1, "1",
                         if_else(stars == 1.5, "1.5",
                                 if_else(stars == 2, "2",
                                         if_else(stars == 2.5, "2.5",
                                                 if_else(stars == 3, "3",
                                                         if_else(stars == 4, "4",
                                                                 if_else(stars == 4.5, "4.5", "5"))))))),
         review_count = as.numeric(review_count)) %>% 
  group_by(stars) %>% 
  plot_ly(x = ~stars, y = ~review_count, color = ~stars, type = "bar", colors = "Set3") %>% 
  layout(xaxis = list(title = "Stars"),
         yaxis = list(title = "Number of Reviews"))
Plots of Restaurants
popular <- categories %>% 
  filter(category == "Restaurants" | category == "Food") %>% 
  distinct(business_id) %>% 
  left_join(categories, by = "business_id") %>% 
  filter(category %in% c("Bars", "Breakfast & Brunch", "Chinese", "Italian", "Mexican", "Chicken Wings", "Salad", "Sushi Bars", "Pizza", "Steakhouses", "Fast Food"))

restaurants %>% 
  select(business_id, neighborhood) %>% 
  inner_join(popular) %>% 
  distinct() %>% 
  group_by(neighborhood, category) %>% 
  tally() %>% 
  plotly::plot_ly(x = ~neighborhood, y = ~n, type = 'bar', color = ~category, hoverinfo = 'text',
        text = ~paste(neighborhood, " has ",
                      n, " ", category, " restaurants.")) %>%
  layout(yaxis = list(title = "Restaurants"), xaxis = list(title = "", tickangle = -45), barmode = 'stack')
## Joining, by = "business_id"
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
restaurants %>% 
  inner_join(popular) %>% 
  select(stars, category) %>% 
  group_by(category) %>% 
  summarise(avg_stars = mean(stars)) %>% 
  arrange(desc(avg_stars)) %>% 
  knitr::kable()
## Joining, by = "business_id"
category avg_stars
Sushi Bars 3.803977
Steakhouses 3.791096
Salad 3.742958
Breakfast & Brunch 3.626866
Bars 3.600000
Italian 3.558333
Mexican 3.537471
Chinese 3.372047
Pizza 3.369863
Chicken Wings 3.174497
Fast Food 2.855289

Geographic Plot by Categories

restaurants %>% 
  inner_join(popular) %>% 
  plot_ly(x = ~longitude, y = ~latitude, type = "scatter", mode = "markers",
          alpha = 0.9, 
          color = ~category, hoverinfo = 'text',
        text = ~paste(name, " @", neighborhood, "\n", address, "\n", city, ", ", state, postal_code, "\n", stars, "star", category, "on Yelp.")) %>%
  layout(xaxis = list(title = "Longitude"),
         yaxis = list(title = "Latitude"))
## Joining, by = "business_id"
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
>>>>>>> 63202e9a2379c7f7d5f2807943011ecd8f3bbc2f

Column

Plots of Restaurants

popular <- categories %>% 
  filter(category == "Restaurants" | category == "Food") %>% 
  distinct(business_id) %>% 
  left_join(categories, by = "business_id") %>% 
  filter(category %in% c("Bars", "Breakfast & Brunch", "Chinese", "Italian", "Mexican", "Chicken Wings", "Salad", "Sushi Bars", "Pizza", "Steakhouses", "Fast Food"))

restaurants %>% 
  select(business_id, neighborhood) %>% 
  inner_join(popular) %>% 
  distinct() %>% 
  group_by(neighborhood, category) %>% 
  tally() %>% 
  plotly::plot_ly(x = ~neighborhood, y = ~n, type = 'bar', color = ~category, hoverinfo = 'text',
        text = ~paste(neighborhood, " has ",
                      n, " ", category, " restaurants.")) %>%
  layout(yaxis = list(title = "Restaurants"), xaxis = list(title = "", tickangle = -45), barmode = 'stack')

Geographic Plot by Categories

restaurants %>% 
  inner_join(popular) %>% 
  plot_ly(x = ~longitude, y = ~latitude, type = "scatter", mode = "markers",
          alpha = 0.9, 
          color = ~category, hoverinfo = 'text',
        text = ~paste(name, " @", neighborhood, "\n", address, "\n", city, ", ", state, postal_code, "\n", stars, "star", category, "on Yelp.")) %>%
  layout(xaxis = list(title = "Longitude"),
         yaxis = list(title = "Latitude"))